###Figures 2, 3 and 4


library(foreign)
library(readstata13)
library(dplyr)

setwd("C:/Users/kevin/Dropbox/Social_Media_Lab/Data_Survey/YouGov_Data/apsr_replication/")
load("merged_nr_soma.RData")



## Recode the Twitter frequency use variable to better reflect the underlying categories
## Roughly, how many times per month do you go on Twitter?
data$twitter_freq_inc<-as.numeric(
  revalue(as.character(data$twitter_freq_inc) , 
          c("7"= "90", "6"="30", "5" = " 15", "4" = "7", "3"="2", 
            "2"="1", "1"="0")))

###need the all_tweets variable un-logged, and to avoid dividing by zero

data$tweets_all_topics<-(1.0001 +data$tweets_all_topics)

data$all_tweets_log<-1/log(data$tweets_all_topics)

#########################################################################
##Loop over all models
#########################################################################


controls <- c("woman", "age", "lowerclass", "profile_education_age", 
              "white_british", "married", "newsnight_freq", "religious",
              "internet_freq_inc",  "newspaper_type" )



full_parties <- c("Labour", "UKIP", "LibDem", "Tories")

parties <- c("labour", "UKIP", "libdem", "conserv")
issues <- c("EU", "spending", "immigration")
waves <- c("w1", "w2", "w3", "w4")
t_parties <- c("labo", "ukip", "lide", "tory")


qs <- c("isis","unemployment",  "immigrants")
t_issues=c("eu", "economy", "immigr")
t_waves <- c("p1", "p2", "p3", "p4")
pid <- c(2, 5, 3, 1)
tt_issues <- c("isis", "econ", "imm")





##initialize
names<-list()


models_full_raw=list()
models_full=list()

models_combo=list()


labo_means=list()
labo_ses=list()
ukip_means=list()
ukip_ses=list()
lide_means=list()
lide_ses=list()
tory_means=list()
tory_ses=list()
right_media_means=list()
right_media_ses=list()
cent_media_means=list()
cent_media_ses=list()
left_media_means=list()
left_media_ses=list()


total=list()  
total_relevant_tweets=list()  

n<-list()
c_issues<-c("EU", "spend", "immigration")
t_issues=c("eu", "economy", "immigr")
 

for (j in 1:length(issues)){
  

##define outcome variables
names<-c(names, paste0( " ",issues[j]))
data$outcome=abs(eval(parse(text=paste0("data$soft_correct_" , c_issues[j], "_w4" ))))


data$baseline=abs(eval(parse(text=paste0("data$soft_correct_", c_issues[j], "_w1"))))

##Look at the relevant tweets from each party in the 3 final time period
data$tweet_count_labo<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_labo"))) +
    eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_labo"))) +
    eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_labo"))) +
    eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_labo"))) +
    eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_labo"))) +
    eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_labo")))
)

data$tweet_count_lide<-  (
  eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_lide"))) +
    eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_lide"))) +
    eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_lide"))) +
    eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_lide"))) +
    eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_lide"))) +
    eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_lide")))
)

data$tweet_count_ukip<-  (
  eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_ukip"))) +
    eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_ukip"))) +
    eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_ukip"))) +
    eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_ukip"))) +
    eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_ukip"))) +
    eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_ukip")))
)

data$tweet_count_tory<-  (
  eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_tory"))) +
    eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_tory"))) +
    eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_tory"))) +
    eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_tory"))) +
    eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_tory"))) +
    eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_tory")))
)




data$right_media<-{ 
  + eval(parse(text=paste0("data$p3_media_right_", t_issues[j]))) +
    eval(parse(text=paste0("data$p2_media_right_", t_issues[j]))) +
    eval(parse(text=paste0("data$p4_media_right_", t_issues[j])))
}

data$right_media_log<-log(1.00001 + data$right_media)

data$left_media<-{ 
  + eval(parse(text=paste0("data$p3_media_left_", t_issues[j]))) +
    eval(parse(text=paste0("data$p2_media_left_", t_issues[j]))) +
    eval(parse(text=paste0("data$p4_media_left_", t_issues[j])))
}

data$left_media_log<-log(1.00001 + data$left_media)


data$cent_media<-{ 
  + eval(parse(text=paste0("data$p3_media_cent_", t_issues[j]))) +
    eval(parse(text=paste0("data$p2_media_cent_", t_issues[j]))) +
    eval(parse(text=paste0("data$p4_media_cent_", t_issues[j])))
}

data$cent_media_log<-log(1.00001 + data$cent_media)



##create aggregate variables 
data$parties<-log(1.0001+ 
                    (data$tweet_count_lide) + (data$tweet_count_labo) +
                    (data$tweet_count_ukip) + (data$tweet_count_tory))

data$media<-log(1.0001+data$cent_media + data$left_media + data$right_media)

data$total<-log(1.0001+ 
                  (data$tweet_count_lide) + (data$tweet_count_labo) +
                  (data$tweet_count_ukip) + (data$tweet_count_tory) +
                  data$cent_media + data$left_media + data$right_media)

total<-c(total, min(data$total, na.rm=T))
total_relevant_tweets[[j]] <-data$total


##log the main variables 

data$tweet_count_tory<-log(1.00001 + data$tweet_count_tory)

data$tweet_count_labo<-log(1.00001 + data$tweet_count_labo)

data$tweet_count_lide<-log(1.00001 + data$tweet_count_lide)

data$tweet_count_ukip<-log(1.00001 + data$tweet_count_ukip)


## model specifications

#save summary
models_full[[j]]<-summary(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                     , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial"))

#save raw
models_full_raw[[j]]<-(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                     , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial"))

#combo
models_combo[[j]]<-summary(glm(paste0("outcome ~ baseline + media + parties + twitter_freq_inc  +"
                                        , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family = "binomial"))

}


################################################################## graphing---all tweets 
full_issues <- c("EU", "Spending", "Immigration")


  
  names<-c("EU", "Spending", "Immigration")
  index<-c(1, 2, 3)
  ##extract mean values 
  means<-c(models_full[[1]]$coefficients["total","Estimate"],
           models_full[[2]]$coefficients["total","Estimate"],
           models_full[[3]]$coefficients["total","Estimate"]
           
           
           )
  
  ##extract standard errors values 
  ses<-c(models_full[[1]]$coefficients["total","Std. Error"],
         models_full[[2]]$coefficients["total","Std. Error"],
         models_full[[3]]$coefficients["total","Std. Error"]
                 )  
  
  
  
  means<-unlist(means)
  ses<-unlist(ses)
  
  ##plot 
  pdf("results/all_w1w4_place_correct_total_1.pdf", 8,4)
  par(mar=c(4, 8, 4, 4)  )
  plot( means, index, xlim=c(-.2, .2), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
        #main = paste0("Wave 1--Wave 4 Effects of Tweets on Placement Accuracy"), pch=3)
        main = "", pch=3)
  axis(2, at=index, labels =paste0(names), las=2)
  abline(v=0)
  points(means + 1.64*ses, index, pch="|")
  points(means - 1.64*ses, index, pch="|")
  for(i in 1:length(means)){
    lines(c(means[i] - 1.96*ses[i],means[i] + 1.96*ses[i]), c(index[i],index[i]))
  }
  
  dev.off()
  
  


################################################################## graphing---all tweets OR
  full_issues <- c("EU", "Spending", "Immigration")
  
  summary(data$total)
  library(effects)
    eff<-allEffects(models_full_raw[[1]], xlevels=list("total"=seq(0,10,1)))
    ###KM 11/18/19
    ##calculate the standard deviations here to get effect sizes to report
    sd(total_relevant_tweets[[1]], na.rm = T)
    hist(total_relevant_tweets[[1]], na.rm = T)
    
    
    ##plot 
    pdf(paste0("results/",full_issues[1],"_w1w4_full_OR_1.pdf"), 8,4)
    par(mar=c(4, 8, 4, 2)  )

    plot(eff, 'total', xlab='Log of Topical Tweets', ylab='Probablity of Correct Ranking W4',
         #main=paste0(full_issues[1]))
         main="")
    dev.off()
    

    eff<-allEffects(models_full_raw[[2]], xlevels=list("total"=seq(0,10,1)))
    ###KM 11/18/19
    ##calculate the standard deviations here to get effect sizes to report
    sd(total_relevant_tweets[[2]], na.rm = T)
    
    ##plot 
    pdf(paste0("results/",full_issues[2],"_w1w4_full_OR.pdf"), 8,4)
    par(mar=c(4, 8, 4, 2)  )
    
    
    plot(eff, 'total', xlab='Log of Topical Tweets', ylab='Probablity of Correct Ranking W4',
         #main=paste0(full_issues[2]))
         main="")
    
    dev.off() 
  
    eff<-allEffects(models_full_raw[[3]], xlevels=list("total"=seq(0,10,1)))
    ###KM 11/18/19
    ##calculate the standard deviations here to get effect sizes to report
    sd(total_relevant_tweets[[3]], na.rm = T)
    
    ##plot 
    pdf(paste0("results/",full_issues[3],"_w1w4_full_OR.pdf"), 8,4)
    par(mar=c(4, 8, 4, 2)  )
    
    
    plot(eff, 'total', xlab='Log of Topical Tweets', ylab='Probablity of Correct Ranking W4',
         #main=paste0(full_issues[3]))
         main="")
    
    dev.off()
 
    
    ################graphing--combined results
    
    full_issues <- c("EU", "Spending", "Immigration")
    
    
    all_means<-vector()
    all_ses<-vector()
    
    for(i in 1:length(models_combo)){
      
      names<-c("Parties", "Media")
      index<-c(.1, .2)
      ##extract mean values 
      means<-c(models_combo[[i]]$coefficients["parties","Estimate"],
               models_combo[[i]]$coefficients["media","Estimate"])
      
      ##extract standard errors values 
      ses<-c(models_combo[[i]]$coefficients["parties","Std. Error"],
             models_combo[[i]]$coefficients["media","Std. Error"])  
      
      

      all_means<-c(all_means,unlist(means))
      all_ses<-c(all_ses,unlist(ses))
      
      
    }
    
    
    
    
    ####################################
    
    #Combine into one graph
    
    
    ##plot 
    pdf(paste0("results/all_w1w4_place_correct_combo_1.pdf"), 12,4)
    par(mar=c(4, 10, 4, 2)  )
    index<-seq(.1, .6, length.out = 6)
    plot( all_means, index, xlim=c(-.2, .2), ylim=c(.09, .61), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
          #main = paste0("Wave 1--Wave 4 Effects of Tweets on Party Placement Accuracy"), pch=3)
          main = "", pch=3)
    names<- c("EU: Parties", "EU: Media","Spending: Parties", "Spending: Media","Immigration: Parties","Immigration: Media"  
    )
    axis(2, at=index, labels =paste0(names), las=2)
    abline(v=0)
    points(all_means + 1.64*all_ses, index, pch="|")
    points(all_means - 1.64*all_ses, index, pch="|")
    for(i in 1:length(all_means)){
      lines(c(all_means[i] - 1.96*all_ses[i],all_means[i] + 1.96*all_ses[i]), c(index[i],index[i]))
    }
    
    dev.off()
    
